do ${d}code/config/country_list.do

global ctrylistX = "$invtcountrylist"

* creates counts unique inventors in a firm by year-division for each country (countries are columns, year-division-firm is the row)
foreach division in auto95 pauto95 {

    *load our firms and patent applications and restrict to relevant ones
	use ${d}datasets/patstat_orbis/Orbis_patents_list_2017_merged.dta, clear
	keep if ipr_type==2 
    mmerge appln_id using ${d}datasets/common_data/patstat_2018b/appln_info.dta, unmatched(master) ukeep(granted)
    keep BvDIDnumber appln_id granted
	mmerge appln_id using ${d}datasets/patent_list/`division'_patents.dta, unmatched(none)

    *merge in the country and do the usual correction for GER/GDR, RUS/USSR
	mmerge appln_id using ${d}datasets/common_data/patstat_2018b/appln_inventor_country_nomissing.dta, unmatched(none)
	do ${d}code/config/auth_map.do invt_country

    *merge in the grouping by patent family and biadic information, restrict to biadic ones
	mmerge appln_id using ${d}datasets/common_data/patstat_2018b/family_info.dta, unmatched(master) ukeep(docdb_family_id fam_earliest_appln_year)
    drop appln_id _m
    mmerge docdb_family_id using ${d}datasets/patstat_orbis/docdb_families2.dta, unmatched(master) ukeep(biadic_D)
    keep if biadic_D == 1

    *create unique inventor counts per patent family and BvD
    *Example: If a patent family firm cobination has 3 inventors, 2 from country A and 1 from country B through applications belonging to that firm,
    *both country A and country B get 0.5 of the patent family assigned.
    *This process only looks at patent applications belonging to a firm. So if a patent family belongs to multiple firms through different applications,
    * with different inventors associated with them, then the patent family is used multiple times to distribute weight 
    *but will not necessarily generate the same distribution for the two companies

	duplicates drop BvDIDnumber docdb_family_id invt_country fam_earliest_appln_year, force 
    ren fam_earliest_appln_year year 
    gen byte x=1
    *split patent family by unique inventors
    bysort docdb BvD : egen nb_invt_ctries = sum(x)
    gen inv_nb_invt_ctries = 1/nb_invt_ctries
    drop nb_invt_ctries x

    *distribute to country variables
    foreach ctry in $ctrylistX {
	    gen invt_`ctry' = invt_country == "`ctry'"
	    replace invt_`ctry' = invt_`ctry' * inv_nb_invt_ctries
    }

    *Sum up the inventors per firm-year-division for each country
    foreach ctry in $ctrylistX {
        bysort BvD year: egen nb_pat_invt_`ctry' = sum(invt_`ctry')
        drop invt_`ctry'
    }

	keep BvD year nb_pat_invt_*
	duplicates drop
	compress
	save ${d}datasets/macrosim/bvd_year_inventor_count_`division'_bia.dta, replace
}

* and add them up together to get all of machinery for our weights
use ${d}datasets/macrosim/bvd_year_inventor_count_pauto95_bia.dta, clear
ren nb_pat_invt_* nb_pat_invt_*_p
mmerge BvD year using ${d}datasets/macrosim/bvd_year_inventor_count_auto95_bia.dta, unmatched(master)

foreach ctry in $ctrylistX {
        ren nb_pat_invt_`ctry' nb_pat_invt_`ctry'_a
        *replacing missing values for one technology if the other one is present. otherwise
        *the addition below produces missing values
        replace nb_pat_invt_`ctry'_a = 0 if nb_pat_invt_`ctry'_a == . & nb_pat_invt_`ctry'_p != .
        replace nb_pat_invt_`ctry'_p = 0 if nb_pat_invt_`ctry'_p == . & nb_pat_invt_`ctry'_a != .
	    gen nb_pat_invt_`ctry' = nb_pat_invt_`ctry'_a + nb_pat_invt_`ctry'_p
        drop nb_pat_invt_`ctry'_a  nb_pat_invt_`ctry'_p
}

save ${d}datasets/macrosim/bvd_year_inventor_count_tfa_bia.dta, replace